(only 1/2 datasize to standard level)
Baf53cre ENS neurons,
CR infection 7d, CTL x4 CKO(Ahr-cko) x4
loading 60k cells for all,
cellranger called 24,758 cells
filt.10x <- Read10X(data.dir = "../output_demo/filtered_feature_bc_matrix/")
## 10X data contains more than one type and is being returned as a list containing matrices of each type.
GEX <- filt.10x$`Gene Expression`
FB <- filt.10x$`Antibody Capture`
dim(GEX)
## [1] 32285 24758
GEX[1:6,1:6]
## 6 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGAATTCAG-1 AAACCCAAGCGTATGG-1 AAACCCAAGGCAGGTT-1
## Xkr4 1 3 2
## Gm1992 . . 1
## Gm19938 . . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
## AAACCCAAGGTTGGTG-1 AAACCCACAAATCGTC-1 AAACCCACAAGGGCAT-1
## Xkr4 1 3 1
## Gm1992 . . .
## Gm19938 . . .
## Gm37381 . . .
## Rp1 . . .
## Sox17 . . .
dim(FB)
## [1] 8 24758
FB[,1:6]
## 8 x 6 sparse Matrix of class "dgCMatrix"
## AAACCCAAGAATTCAG-1 AAACCCAAGCGTATGG-1 AAACCCAAGGCAGGTT-1
## CTL.1 118 98 92
## CTL.2 22 13 19
## CTL.3 24 92 26
## CTL.4 106 69 34
## CKO.1 49 41 36
## CKO.2 359 53 44
## CKO.3 76 364 198
## CKO.4 220 735 164
## AAACCCAAGGTTGGTG-1 AAACCCACAAATCGTC-1 AAACCCACAAGGGCAT-1
## CTL.1 96 112 96
## CTL.2 16 25 21
## CTL.3 17 15 25
## CTL.4 48 31 23
## CKO.1 38 50 54
## CKO.2 380 64 38
## CKO.3 71 232 289
## CKO.4 167 206 192
rowSums(FB)
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4
## 5238102 901560 1048906 1403495 1923599 3230527 3292731 7463190
rowSums(FB>0)
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4
## 24757 24706 24738 24746 24736 24755 24758 24758
# build seurat object
FB.seur <- CreateSeuratObject(counts = FB,project = "CR7d_LI")
FB.seur
## An object of class Seurat
## 8 features across 24758 samples within 1 assay
## Active assay: RNA (8 features, 0 variable features)
rownames(FB.seur)
## [1] "CTL.1" "CTL.2" "CTL.3" "CTL.4" "CKO.1" "CKO.2" "CKO.3" "CKO.4"
perform Seurat ’Demultiplexing with hashtag oligos(HTOs)
ref https://satijalab.org/seurat/articles/hashing_vignette.html
# normalize
FB.seur <- NormalizeData(FB.seur)
FB.seur <- FindVariableFeatures(FB.seur, selection.method = "mean.var.plot")
FB.seur <- ScaleData(FB.seur, features = VariableFeatures(FB.seur))
## Centering and scaling data matrix
# independent assay for HTO data
FB.seur[['HTO']] <- CreateAssayObject(counts = FB)
FB.seur <- NormalizeData(FB.seur, assay = 'HTO', normalization.method = 'CLR')
## Normalizing across features
first define FB colors based on conditions
color.FB <- ggsci::pal_d3("category20c")(20)[c(1,6,11,16,
2,7,12,17
)]
level.FB <- c(rownames(FB.seur))
color.FBraw <- c("#FF6C91","lightgrey",color.FB)
scales::show_col(color.FBraw, ncol = 6)
scales::show_col(color.FB, ncol = 4)
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
table.demux
## quantile Doublet CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4 Negative
## 1 0.50 23881 80 128 110 84 83 89 94 82 127
## 2 0.51 23871 80 129 110 86 85 90 97 82 128
## 3 0.52 23831 82 131 115 96 87 95 103 86 132
## 4 0.53 23817 83 134 120 97 88 95 105 87 132
## 5 0.54 23765 89 140 129 98 97 104 108 89 139
## 6 0.55 23718 99 148 125 100 102 107 112 97 150
## 7 0.56 23652 105 143 138 105 111 115 122 107 160
## 8 0.57 23595 115 152 149 110 112 119 127 112 167
## 9 0.58 23574 117 157 150 113 117 120 130 112 168
## 10 0.59 23503 118 169 150 124 129 128 135 123 179
## 11 0.60 23474 121 172 157 129 129 130 138 125 183
## 12 0.61 23410 128 181 169 132 133 140 143 136 186
## 13 0.62 23307 135 182 184 149 142 152 154 152 201
## 14 0.63 23262 137 188 180 159 149 158 157 157 211
## 15 0.64 23195 140 196 189 166 159 167 164 164 218
## 16 0.65 23144 144 207 198 175 158 174 165 169 224
## 17 0.66 23047 155 208 204 189 165 189 181 184 236
## 18 0.67 22934 169 224 219 199 177 208 192 195 241
## 19 0.68 22893 174 227 223 204 187 210 199 196 245
## 20 0.69 22797 182 249 237 213 194 223 209 201 253
## 21 0.70 22728 188 255 240 224 207 229 219 206 262
## 22 0.71 22568 205 258 267 249 221 249 241 220 280
## 23 0.72 22493 208 268 279 248 229 257 256 228 292
## 24 0.73 22386 222 280 289 265 236 268 270 237 305
## 25 0.74 22305 234 297 302 269 240 278 281 243 309
## 26 0.75 22140 259 303 306 291 265 295 303 271 325
## 27 0.76 22040 269 316 323 310 274 305 312 276 333
## 28 0.77 21891 277 344 348 317 281 330 330 293 347
## 29 0.78 21707 297 347 368 345 305 356 350 310 373
## 30 0.79 21545 315 376 396 351 318 375 376 318 388
## 31 0.80 21357 332 410 420 372 340 399 396 334 398
## 32 0.81 21195 345 411 440 398 363 414 417 361 414
## 33 0.82 20978 362 445 461 415 382 434 449 381 451
## 34 0.83 20848 375 469 485 435 389 444 455 392 466
## 35 0.84 20545 410 493 529 465 419 484 496 423 494
## 36 0.85 20370 431 523 552 486 432 503 512 435 514
## 37 0.86 20094 456 539 584 520 475 531 546 461 552
## 38 0.87 19883 477 576 606 533 497 550 574 485 577
## 39 0.88 19518 518 604 660 577 538 596 615 522 610
## 40 0.89 19272 544 643 691 599 568 622 643 540 636
## 41 0.90 18823 596 680 746 647 621 674 694 592 685
## 42 0.91 18441 638 705 782 695 668 715 752 624 738
## 43 0.92 18013 689 763 826 748 701 763 797 662 796
## 44 0.93 17535 744 807 884 800 747 803 862 719 857
## 45 0.94 16913 818 878 956 857 812 869 938 791 926
## 46 0.95 16161 885 944 1036 927 890 972 1022 894 1027
## 47 0.96 15334 971 1012 1133 1022 991 1053 1122 969 1151
## 48 0.97 14456 1059 1114 1231 1086 1085 1148 1212 1043 1324
## 49 0.98 13099 1193 1253 1403 1203 1206 1317 1349 1155 1580
## 50 0.99 11326 1336 1412 1597 1351 1353 1506 1545 1268 2064
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))
plot(table.demux$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux[,grep("CTL|CKO",colnames(table.demux))]), type="p", col="#306000", pch=16, ylab="Singlet")
#plot(table.demux$quantile, pch=16, ylab="mod-quantile")
plot.new()
plot(table.demux[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux)[2+1])
plot(table.demux[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux)[2+2])
plot(table.demux[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux)[2+3])
plot(table.demux[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux)[2+4])
plot(table.demux[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux)[2+5])
plot(table.demux[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux)[2+6])
plot(table.demux[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux)[2+7])
plot(table.demux[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux)[2+8])
#table.demux.s
#color.FB <- scales::hue_pal()(10)
par(mfrow=c(3,4))
plot(table.demux.s$Doublet, type="p", col="#FF6C91", pch=16, ylab="Doublet")
plot(table.demux.s$Negative, type="p", col="lightgrey", pch=16, ylab="Negative")
plot(rowSums(table.demux.s[,grep("CTL|CKO",colnames(table.demux.s))]), type="p", col="#306000", pch=16, ylab="Singlet")
#plot(table.demux.s$quantile, pch=16, ylab="mod-quantile")
plot.new()
plot(table.demux.s[,2+1], type="p", col=color.FB[1], pch=16, ylab=colnames(table.demux.s)[2+1])
plot(table.demux.s[,2+2], type="p", col=color.FB[2], pch=16, ylab=colnames(table.demux.s)[2+2])
plot(table.demux.s[,2+3], type="p", col=color.FB[3], pch=16, ylab=colnames(table.demux.s)[2+3])
plot(table.demux.s[,2+4], type="p", col=color.FB[4], pch=16, ylab=colnames(table.demux.s)[2+4])
plot(table.demux.s[,2+5], type="p", col=color.FB[5], pch=16, ylab=colnames(table.demux.s)[2+5])
plot(table.demux.s[,2+6], type="p", col=color.FB[6], pch=16, ylab=colnames(table.demux.s)[2+6])
plot(table.demux.s[,2+7], type="p", col=color.FB[7], pch=16, ylab=colnames(table.demux.s)[2+7])
plot(table.demux.s[,2+8], type="p", col=color.FB[8], pch=16, ylab=colnames(table.demux.s)[2+8])
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.99)
## Cutoff for CTL.1 : 191 reads
## Cutoff for CTL.2 : 33 reads
## Cutoff for CTL.3 : 45 reads
## Cutoff for CTL.4 : 55 reads
## Cutoff for CKO.1 : 73 reads
## Cutoff for CKO.2 : 129 reads
## Cutoff for CKO.3 : 109 reads
## Cutoff for CKO.4 : 301 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 11326 2064 11368
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 11326 2064 1336 1412 1597 1351 1353 1506
## CKO.3 CKO.4
## 1545 1268
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.992)
## Cutoff for CTL.1 : 199 reads
## Cutoff for CTL.2 : 34 reads
## Cutoff for CTL.3 : 46 reads
## Cutoff for CTL.4 : 57 reads
## Cutoff for CKO.1 : 76 reads
## Cutoff for CKO.2 : 135 reads
## Cutoff for CKO.3 : 113 reads
## Cutoff for CKO.4 : 312 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 10851 2207 11700
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 10851 2207 1375 1458 1640 1381 1391 1551
## CKO.3 CKO.4
## 1590 1314
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.994)
## Cutoff for CTL.1 : 209 reads
## Cutoff for CTL.2 : 36 reads
## Cutoff for CTL.3 : 49 reads
## Cutoff for CTL.4 : 60 reads
## Cutoff for CKO.1 : 79 reads
## Cutoff for CKO.2 : 142 reads
## Cutoff for CKO.3 : 118 reads
## Cutoff for CKO.4 : 325 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 10217 2436 12105
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 10217 2436 1464 1506 1686 1422 1444 1597
## CKO.3 CKO.4
## 1637 1349
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.996)
## Cutoff for CTL.1 : 223 reads
## Cutoff for CTL.2 : 38 reads
## Cutoff for CTL.3 : 52 reads
## Cutoff for CTL.4 : 63 reads
## Cutoff for CKO.1 : 85 reads
## Cutoff for CKO.2 : 153 reads
## Cutoff for CKO.3 : 125 reads
## Cutoff for CKO.4 : 345 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 9524 2755 12479
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 9524 2755 1524 1549 1727 1481 1481 1650
## CKO.3 CKO.4
## 1701 1366
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.998)
## Cutoff for CTL.1 : 247 reads
## Cutoff for CTL.2 : 42 reads
## Cutoff for CTL.3 : 58 reads
## Cutoff for CTL.4 : 69 reads
## Cutoff for CKO.1 : 94 reads
## Cutoff for CKO.2 : 171 reads
## Cutoff for CKO.3 : 137 reads
## Cutoff for CKO.4 : 377 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 8385 3303 13070
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 8385 3303 1628 1661 1780 1524 1565 1730
## CKO.3 CKO.4
## 1774 1408
# Demultiplex cells based on HTO enrichment
FB.seur <- HTODemux(FB.seur, assay = 'HTO', positive.quantile = 0.999)
## Cutoff for CTL.1 : 271 reads
## Cutoff for CTL.2 : 46 reads
## Cutoff for CTL.3 : 63 reads
## Cutoff for CTL.4 : 76 reads
## Cutoff for CKO.1 : 103 reads
## Cutoff for CKO.2 : 190 reads
## Cutoff for CKO.3 : 149 reads
## Cutoff for CKO.4 : 409 reads
table(FB.seur$HTO_classification.global)
##
## Doublet Negative Singlet
## 7484 3927 13347
table(FB.seur$hash.ID)[c("Doublet","Negative",rownames(FB))]
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 7484 3927 1669 1663 1828 1521 1607 1799
## CKO.3 CKO.4
## 1821 1439
# manual cutoffs based on default values
#
# CTL.1 q0.998 247
# CTL.2 q0.998 42
# CTL.3 q0.998 58
# CTL.4 q0.998 69
# CKO.1 q0.998 94
# CKO.2 q0.998 171
# CKO.3 q0.998 137
# CKO.4 q0.998 377
#
cutoff.FB <- c(247,42,58,69,
94,171,137,377)
names(cutoff.FB) <- rownames(FB.seur)
cutoff.FB
## CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2 CKO.3 CKO.4
## 247 42 58 69 94 171 137 377
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8]),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
par(mfrow=c(2,4))
#plot(sort(rowSums(t(FB))),
# xlab="reordered cells",
# ylab="UMI counts", log="xy",
# main="sum")
plot(sort(t(FB)[,1], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[1])
abline(h=cutoff.FB[1],col = color.FB[1])
plot(sort(t(FB)[,2], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[2])
abline(h=cutoff.FB[2],col = color.FB[2])
plot(sort(t(FB)[,3], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[3])
abline(h=cutoff.FB[3],col = color.FB[3])
plot(sort(t(FB)[,4], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[4])
abline(h=cutoff.FB[4],col = color.FB[4])
plot(sort(t(FB)[,5], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[5])
abline(h=cutoff.FB[5],col = color.FB[5])
plot(sort(t(FB)[,6], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[6])
abline(h=cutoff.FB[6],col = color.FB[6])
plot(sort(t(FB)[,7], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[7])
abline(h=cutoff.FB[7],col = color.FB[7])
plot(sort(t(FB)[,8], decreasing = T),
xlab="reordered cells",
ylab="UMI counts", log="xy",
main=rownames(FB)[8])
abline(h=cutoff.FB[8],col = color.FB[8])
## custom cutoff run this
custom.Demux <- function(FBobj,FBcutoff){
# define decision matrix
dd <- FBobj@assays[['HTO']]@counts
dd <- apply(dd, 2, function(x){
x > FBcutoff
})
x1 <- colSums(dd)
x1[x1>1] <- "Doublet"
x1[x1==0] <- "Negative"
x1[x1==1] <- "Singlet"
x2 <- x1
bc.slt <- names(x1)[x1=="Singlet"]
for(bc in bc.slt){
x2[bc] <- rownames(dd)[dd[,bc]]
}
FBdf <- data.frame(HTO_classification.global=factor(x1,
levels = c("Doublet","Negative","Singlet")),
hash.ID=factor(x2,
levels = c("Doublet","Negative",rownames(dd))))
return(FBdf)
}
df.FB <- custom.Demux(FB.seur,cutoff.FB)
## custom cutoff run this
dim(df.FB)
## [1] 24758 2
df.FB[1:10,]
## HTO_classification.global hash.ID
## AAACCCAAGAATTCAG-1 Doublet Doublet
## AAACCCAAGCGTATGG-1 Doublet Doublet
## AAACCCAAGGCAGGTT-1 Singlet CKO.3
## AAACCCAAGGTTGGTG-1 Singlet CKO.2
## AAACCCACAAATCGTC-1 Singlet CKO.3
## AAACCCACAAGGGCAT-1 Singlet CKO.3
## AAACCCACACCAGGTC-1 Doublet Doublet
## AAACCCACACGGCACT-1 Singlet CKO.4
## AAACCCACAGCAGTCC-1 Doublet Doublet
## AAACCCACAGCATTGT-1 Doublet Doublet
## custom cutoff run this
table(df.FB)
## hash.ID
## HTO_classification.global Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## Doublet 8385 0 0 0 0 0 0 0
## Negative 0 3303 0 0 0 0 0 0
## Singlet 0 0 1628 1660 1769 1524 1562 1714
## hash.ID
## HTO_classification.global CKO.3 CKO.4
## Doublet 0 0
## Negative 0 0
## Singlet 1795 1418
## custom cutoff run this
FB.seur@meta.data[,c("HTO_classification.global","hash.ID")] <- df.FB
FB.seur@meta.data[1:4,]
## orig.ident nCount_RNA nFeature_RNA nCount_HTO nFeature_HTO
## AAACCCAAGAATTCAG-1 CR7d_LI 974 8 974 8
## AAACCCAAGCGTATGG-1 CR7d_LI 1465 8 1465 8
## AAACCCAAGGCAGGTT-1 CR7d_LI 613 8 613 8
## AAACCCAAGGTTGGTG-1 CR7d_LI 833 8 833 8
## HTO_maxID HTO_secondID HTO_margin HTO_classification
## AAACCCAAGAATTCAG-1 CKO.2 CTL.4 0.6061048 CKO.2_CTL.4
## AAACCCAAGCGTATGG-1 CKO.3 CTL.3 0.1276493 CKO.3_CTL.3
## AAACCCAAGGCAGGTT-1 CKO.3 CTL.3 0.4757557 CKO.3
## AAACCCAAGGTTGGTG-1 CKO.2 CTL.4 1.1542604 CKO.2
## HTO_classification.global hash.ID
## AAACCCAAGAATTCAG-1 Doublet Doublet
## AAACCCAAGCGTATGG-1 Doublet Doublet
## AAACCCAAGGCAGGTT-1 Singlet CKO.3
## AAACCCAAGGTTGGTG-1 Singlet CKO.2
## default q0.99 run this
#FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
# levels = c("Doublet","Negative","Singlet"))
#FB.seur$hash.ID <- factor(as.character(FB.seur$hash.ID),
# levels = c("Doublet","Negative",rownames(FB.seur)))
sl_stat <- table(FB.seur$HTO_classification.global)
barplot(sl_stat,ylim = c(0,23500),col = c("#FF6C91","lightgrey","#7CAE00"),main = "Feature Barcode statistics")
text(x=1:3*1.2-0.5,y=sl_stat+1680,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
FB.seur@active.ident <- factor(FB.seur$HTO_maxID,levels=rownames(FB.seur))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]),same.y.lims= TRUE, ncol=4,
cols = color.FB)
FB.seur@meta.data$hash.ID.sort <- factor(as.character(FB.seur@meta.data$hash.ID),levels = c("Doublet","Negative",levels(FB.seur@active.ident)))
RidgePlot(FB.seur, assay = 'HTO', features = rownames(FB.seur[['HTO']]), ncol=4,same.y.lims= TRUE,
cols = c("#FF6C91","lightgrey",color.FB),group.by = "hash.ID.sort")
# first, remove negative cells from th object
#FB.seur.subset <- FB.seur
# Calculate a tSNE embedding of the HTO data
#DefaultAssay(FB.seur.subset) <- "HTO"
#FB.seur.subset <- ScaleData(FB.seur.subset, features = rownames(FB.seur.subset), verbose = FALSE)
#FB.seur.subset <- RunPCA(FB.seur.subset, features = rownames(FB.seur.subset), approx = FALSE)
#FB.seur.subset <- RunTSNE(FB.seur.subset, dims = 1:8, perplexity = 500, check_duplicates = FALSE)
#saveRDS(FB.seur.subset, "FB0618.seur.subset.rds")
FB.seur.subset <- readRDS("FB0618.seur.subset.rds")
multiplot(
DimPlot(FB.seur.subset, order = rev(levels(FB.seur@active.ident)),
cols = color.FB) + labs(title = "FB Max_ID"),
DimPlot(FB.seur.subset, order = c("Doublet",rev(levels(FB.seur@active.ident)),"Negative"), group.by = 'hash.ID', label = F,
cols = c("lightgrey",color.FB,"#FF6C91")) +
labs(title = "FB Filtered_ID") + theme(plot.title = element_text(hjust = 0)) ,
cols = 1)
##
## Attaching package: 'grid'
## The following object is masked from 'package:Biostrings':
##
## pattern
FB.seur$HTO_classification.global <- factor(as.character(FB.seur$HTO_classification.global),
levels = c("Doublet","Negative","Singlet"))
Idents(FB.seur) <- "HTO_classification.global"
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38"))
VlnPlot(FB.seur, features = "nCount_RNA", pt.size = 0, log = TRUE, cols = c("#F8766D","lightgrey","#00BA38")) + geom_jitter(alpha=0.015)
table(FB.seur@meta.data$hash.ID.sort)
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 8385 3303 1628 1660 1769 1524 1562 1714
## CKO.3 CKO.4
## 1795 1418
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,12800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+875,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
mainly follow https://satijalab.org/seurat/articles/pbmc3k_tutorial.html
GEX.seur <- CreateSeuratObject(counts = GEX,
min.cells = 3,
min.features = 200,
project = "CR7d_LI")
GEX.seur
## An object of class Seurat
## 20978 features across 24758 samples within 1 assay
## Active assay: RNA (20978 features, 0 variable features)
grep("^mt-",rownames(GEX),value = T)
## [1] "mt-Nd1" "mt-Nd2" "mt-Co1" "mt-Co2" "mt-Atp8" "mt-Atp6" "mt-Co3"
## [8] "mt-Nd3" "mt-Nd4l" "mt-Nd4" "mt-Nd5" "mt-Nd6" "mt-Cytb"
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
GEX.seur[["percent.mt"]] <- PercentageFeatureSet(GEX.seur, features = MT_gene)
RB_gene <- grep("^Rps|^Rpl",rownames(GEX.seur),value = T)
GEX.seur[["percent.rb"]] <- PercentageFeatureSet(GEX.seur, features = RB_gene)
# Visualize QC metrics as a violin plot
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.01)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
GEX.seur$FB.info <- FB.seur@meta.data[rownames(GEX.seur@meta.data),"hash.ID"]
#head(GEX.seur@meta.data)
table(GEX.seur$FB.info)
##
## Doublet Negative CTL.1 CTL.2 CTL.3 CTL.4 CKO.1 CKO.2
## 8385 3303 1628 1660 1769 1524 1562 1714
## CKO.3 CKO.4
## 1795 1418
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##
## This message will be shown once per session.
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
GEX.seur <- subset(GEX.seur, subset = FB.info!="Doublet" & FB.info!="Negative" )
GEX.seur
## An object of class Seurat
## 20978 features across 13070 samples within 1 assay
## Active assay: RNA (20978 features, 0 variable features)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = c("#FF6C91","lightgrey",color.FB),
pt.size = 0, split.by = "FB.info")
colon neuron, there’s a strange low enrichment distribution under nFeature 500
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
GEX.seur <- subset(GEX.seur, subset = percent.mt < 5 & nFeature_RNA > 200 & nFeature_RNA < 1800 & nCount_RNA < 3600)
GEX.seur
## An object of class Seurat
## 20978 features across 13055 samples within 1 assay
## Active assay: RNA (20978 features, 0 variable features)
filtered obj
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0.1)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rb"), ncol = 4, pt.size = 0)
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0.1, split.by = "FB.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rb"),
#ncol = 3,
ncol = 2,
cols = color.FBraw,
pt.size = 0, split.by = "FB.info")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt")
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plotc <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.rb")
plota + plotb + plotc
par(mar=c(6,4,2,3))
sl_stat <- table(FB.seur$hash.ID.sort)
barplot(sl_stat,ylim = c(0,10800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+675,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
par(mar=c(6,4,2,3))
sl_stat <- table(GEX.seur$FB.info)
barplot(sl_stat,ylim = c(0,2800),
col = c("#FF6C91","lightgrey",color.FB),
main = "Feature Barcode statistics",cex.names = 0.75, xaxt = "n")
axis(1,1:10*1.2-0.48 ,levels(FB.seur@meta.data$hash.ID.sort), las=3, cex.axis=0.85)
text(x=1:10*1.2-0.45,y=sl_stat+175,paste0(sl_stat,"\n",100*round(as.numeric(sl_stat/sum(sl_stat)),4),"%"),cex = 0.75)
s.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$s.genes))
g2m.genes <- Hmisc::capitalize(tolower(cc.genes.updated.2019$g2m.genes))
GEX.seur <- CellCycleScoring(GEX.seur,
s.features = s.genes,
g2m.features = g2m.genes)
## Warning: The following features are not present in the object: Cdca7, Uhrf1,
## Chaf1b, E2f8, not searching for symbol synonyms
## Warning: The following features are not present in the object: Cdk1, Ube2c,
## Top2a, Aurkb, Bub1, Gtse1, Cdc25c, Kif2c, Dlgap5, Hmmr, Nek2, not searching for
## symbol synonyms
VlnPlot(GEX.seur, features = c("S.Score", "G2M.Score"), group.by = "FB.info",
ncol = 2, pt.size = 0.1)
GEX.seur.cc <- GEX.seur
GEX.seur.cc <- NormalizeData(GEX.seur.cc)
GEX.seur.cc <- FindVariableFeatures(GEX.seur.cc)
GEX.seur.cc <- ScaleData(GEX.seur.cc)
Idents(GEX.seur.cc) <- "Phase"
RidgePlot(GEX.seur.cc, features = c("Pcna", "Top2a", "Mcm6", "Mki67"), ncol = 2)
GEX.seur.cc <- RunPCA(GEX.seur.cc, features = c(s.genes, g2m.genes))
DimPlot(GEX.seur.cc, reduction = 'pca')
GEX.seur <- NormalizeData(GEX.seur, normalization.method = "LogNormalize", scale.factor = 10000)
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 1500)
# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top20 <- head(VariableFeatures(GEX.seur), 20)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top20, repel = T)
plot1 + plot2
head(VariableFeatures(GEX.seur), 200)
## [1] "Cntnap2" "Mgat4c" "Zfp804a" "Cntn5"
## [5] "Nmu" "Robo2" "Cpne4" "Sgcz"
## [9] "Gm32647" "Lingo2" "Cdh8" "Kcnb2"
## [13] "Klhl1" "Sst" "Gm30613" "4930432L08Rik"
## [17] "Pcdh9" "Pcdh11x" "Dgkg" "Gm21847"
## [21] "Myh11" "Adgrg6" "Ntng1" "Kctd16"
## [25] "Csmd1" "Gm29521" "Vip" "Gpr149"
## [29] "Brinp3" "Nrxn3" "Nrg1" "Ebf1"
## [33] "Grm7" "Tmeff2" "Car10" "Astn2"
## [37] "Kirrel3" "Gm26612" "Sema3e" "Asic2"
## [41] "Gpc6" "Mmrn1" "Galntl6" "Chsy3"
## [45] "Wdr17" "Ano5" "Gal" "Kcnq5"
## [49] "Ano2" "Gpm6a" "Prkg1" "Cenpf"
## [53] "Unc5d" "Rbfox1" "Dgki" "Rab27b"
## [57] "Gm15680" "Cdh18" "Gpc5" "Schip1"
## [61] "March1" "Cdh9" "Gm30094" "Pdzrn4"
## [65] "Plxna4" "Itgb6" "Gm12296" "Gm15261"
## [69] "Chrm3" "M1ap" "B230323A14Rik" "Tac1"
## [73] "Gm47456" "Tafa1" "Plcl1" "Pbx3"
## [77] "Kcnh7" "Cpa6" "Grik1" "Lrp1b"
## [81] "Rbpms" "Spock3" "1700111E14Rik" "L3mbtl4"
## [85] "Trps1" "A330008L17Rik" "Xirp2" "Ptprt"
## [89] "Tafa2" "Clstn2" "D030068K23Rik" "Fut9"
## [93] "Grin3a" "Ntrk3" "Pcdh10" "Opcml"
## [97] "Zfp804b" "mt-Co3" "Col25a1" "Dcc"
## [101] "Ccbe1" "Grm8" "Nxph2" "Pde7b"
## [105] "Nxph1" "Zfhx3" "Cacna2d3" "Grp"
## [109] "Pde4d" "Cdh6" "4930509J09Rik" "Reln"
## [113] "mt-Atp6" "Rfx6" "Penk" "St8sia2"
## [117] "Cdh19" "Gna14" "Shisa6" "Adgrl4"
## [121] "Scnn1a" "Gm38405" "9530059O14Rik" "Gm16685"
## [125] "Fstl4" "Myl1" "Kctd8" "Gm28153"
## [129] "Unc13c" "Unc5c" "Kcnd2" "Vgll3"
## [133] "Alk" "Actg2" "AC150683.1" "1700034P13Rik"
## [137] "Inpp4b" "Calcb" "Cysltr2" "Gm20757"
## [141] "Robo1" "Trhde" "Erbb4" "Smtn"
## [145] "Adarb2" "Dgkb" "Piezo2" "Kif26b"
## [149] "6330411D24Rik" "Gm49127" "Prox1" "Cmah"
## [153] "Sema5a" "Dock10" "Abca9" "Skap1"
## [157] "Hpse2" "Gm11339" "Casp8" "Dlc1"
## [161] "Tenm4" "Trp53cor1" "Avil" "Gm20754"
## [165] "Cfh" "Efr3a" "Gm29282" "Gm13376"
## [169] "Bloc1s6os" "Rbm46" "Muc16" "Ndufa4l2"
## [173] "Gm2670" "Ccdc68" "Gm34517" "Chrna9"
## [177] "Lgals9" "Galnt18" "1700063D05Rik" "Gm49678"
## [181] "Cntn4" "E130310I04Rik" "Hcn1" "Gm826"
## [185] "Bmpr1b" "Dock1" "Dlgap1" "9530026P05Rik"
## [189] "Slc4a4" "Gm12648" "Foxp2" "Nell1"
## [193] "Luzp2" "Dab1" "Gm4128" "Stac"
## [197] "5530401A14Rik" "Kcnmb2" "Gm20713" "Il1rapl1"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix
# exclude MT genes and more
#DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^Gm|^Hist",rownames(GEX.seur),value = T)
DIG <- grep("^Tra|^Trb|^Trg|^Trd|^Tcr|^Igm|^Igh|^Igk|^Igl|Jchain|Mzb1|Vpreb|Lars2|Jun|Fos|^Hsp|^Rps|^Rpl|Hbb-|Hba-|^Dnaj|^AY|^AC|^AA|^AW|^BC|^Gm|^Hist|Rik$|-ps",
rownames(GEX.seur),value = T)
CC_gene <- Hmisc::capitalize(tolower(as.vector(unlist(cc.genes.updated.2019))))
VariableFeatures(GEX.seur) <- setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,
DIG,
CC_gene) )
GEX.seur <- RunPCA(GEX.seur, features = VariableFeatures(GEX.seur))
## PC_ 1
## Positive: Nos1, Cmah, Epha5, Cacnb2, Rgs6, Thsd7a, Hmcn1, Slc44a5, Gfra1, Alcam
## Thsd7b, Lrrc4c, Dgkb, Syn3, Etv1, Cadps2, Dpp10, Rarb, Stxbp6, Vwa5b1
## Creb5, Auts2, Col25a1, Rgs7, Synpo2, Bves, Dach2, Popdc3, Trpc4, Arhgap15
## Negative: Tafa1, Rbfox1, Gpc6, Pde4b, Unc5c, Alk, Plxna4, Plcl1, Ptprt, Bnc2
## Stxbp5l, Lingo2, Tafa2, Mgat4c, Pbx1, Cpne8, Nell1, Grik1, Pbx3, Cdh8
## Pld5, Unc5d, Cntnap2, Kalrn, Kctd8, Cdh18, Zfhx3, Grm7, Chsy3, Ccbe1
## PC_ 2
## Positive: Auts2, Mgat4c, Sgcz, Robo2, Cdh8, Zfp804a, Tmeff2, Dgkg, Kcnb2, Dgkb
## Nmu, Ano2, Kcnq5, Schip1, Slc4a4, Cntn5, Astn2, Cpne4, Brinp3, Ntrk3
## Gpr149, Myl1, Cdh6, Fgf14, Itgb6, Zfhx3, Pbx3, Clstn2, Ntng1, Plcl1
## Negative: Grik1, Bnc2, Galntl6, Tshz2, Pcdh7, Ptprt, Pde4b, Tox, Dlgap2, Cdc14a
## Lrp1b, Plcxd3, Alk, Nkain2, Tafa2, St6galnac3, Arhgap24, Tmem132c, Oprk1, Gfra2
## Cacna1e, Olfm3, Brinp2, Pld5, Rbfox1, Nxph1, Fhit, Satb1, Colec12, Unc5d
## PC_ 3
## Positive: Dlgap1, Tenm2, Schip1, Pde4d, Kctd16, Dock10, Kctd8, L3mbtl4, Skap1, Unc5d
## Egfem1, Stac, Kcnd2, Nhs, Grm7, Dmd, Tac1, Kirrel3, Fut9, Sybu
## Sema3e, Pde7b, Plcb1, Arhgef3, Egfr, Col27a1, Asic2, Pde1c, Pbx3, Col25a1
## Negative: Cpne4, Tshz2, Cdh8, Ccbe1, Ptprt, Chsy3, Mgat4c, Nmu, Nrxn3, Ano2
## Ntrk3, Tcf7l2, B3galt1, Plxna4, Gpr149, Cux2, Dgkg, Nfia, Pcdh9, Itgb6
## Slc2a13, Galnt17, Scube1, Clstn2, Calcb, Vgll3, Rbfox1, Ptprd, Cdc14a, Dgki
## PC_ 4
## Positive: Car10, Nxph1, Cntn5, Synpr, Epha5, Lrp1b, Dmd, Unc5d, Csmd3, Prkg1
## Zfp804b, Dock10, Pcdh9, Fstl4, Kctd8, Erbb4, Cntnap2, Calcrl, Epha6, Pcdh11x
## Olfm3, Pde1c, Nmu, Galntl6, Cdh8, Pcdh10, Cacna2d3, Spock3, Lsamp, Rtl4
## Negative: Ntng1, Klhl1, Tafa2, Kcnh7, Gna14, Nxph2, Zfhx3, Kif26b, Flrt2, Bmpr1b
## Pbx3, Ano5, Enox1, Csmd1, Skap1, Dlc1, L3mbtl4, Tmtc2, Trpm3, Galnt18
## Tnr, Trps1, Pbx1, Slc44a5, Zbbx, Pcdh7, Tenm4, Sez6l, Serpini1, Pakap
## PC_ 5
## Positive: Ntng1, Kcnd2, Trps1, Cntn5, Klhl1, Sgcz, Csmd3, Ebf1, Csmd1, Nkain2
## Cdh18, Gna14, Pcdh7, Kif26b, Spock3, Gng2, Rbfox1, Tnr, Schip1, Nxph2
## Bmpr1b, Kcnq5, Camk4, Cacna1e, Trhde, Slit2, Spock1, Alk, Nrg1, Cpne8
## Negative: Sema3e, Pde7b, Kctd16, Fut9, Egfr, Lingo2, Fras1, Prkg1, Grm8, Nfatc1
## L3mbtl4, P3h2, Mgll, Shisa6, Camk1d, Stac, Eepd1, Tac1, Col27a1, Lmo7
## Met, Hcn1, Ntn1, Lamc3, Egfros, Dock1, Fam189a1, Pbx1, Snx7, Grm7
length(setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,DIG,CC_gene)))
## [1] 1112
setdiff(VariableFeatures(object = GEX.seur),
c(MT_gene,DIG,CC_gene))[1:300]
## [1] "Cntnap2" "Mgat4c" "Zfp804a" "Cntn5" "Nmu" "Robo2"
## [7] "Cpne4" "Sgcz" "Lingo2" "Cdh8" "Kcnb2" "Klhl1"
## [13] "Sst" "Pcdh9" "Pcdh11x" "Dgkg" "Myh11" "Adgrg6"
## [19] "Ntng1" "Kctd16" "Csmd1" "Vip" "Gpr149" "Brinp3"
## [25] "Nrxn3" "Nrg1" "Ebf1" "Grm7" "Tmeff2" "Car10"
## [31] "Astn2" "Kirrel3" "Sema3e" "Asic2" "Gpc6" "Mmrn1"
## [37] "Galntl6" "Chsy3" "Wdr17" "Ano5" "Gal" "Kcnq5"
## [43] "Ano2" "Gpm6a" "Prkg1" "Unc5d" "Rbfox1" "Dgki"
## [49] "Rab27b" "Cdh18" "Gpc5" "Schip1" "March1" "Cdh9"
## [55] "Pdzrn4" "Plxna4" "Itgb6" "Chrm3" "M1ap" "Tac1"
## [61] "Tafa1" "Plcl1" "Pbx3" "Kcnh7" "Cpa6" "Grik1"
## [67] "Lrp1b" "Rbpms" "Spock3" "L3mbtl4" "Trps1" "Xirp2"
## [73] "Ptprt" "Tafa2" "Clstn2" "Fut9" "Grin3a" "Ntrk3"
## [79] "Pcdh10" "Opcml" "Zfp804b" "Col25a1" "Dcc" "Ccbe1"
## [85] "Grm8" "Nxph2" "Pde7b" "Nxph1" "Zfhx3" "Cacna2d3"
## [91] "Grp" "Pde4d" "Cdh6" "Reln" "Rfx6" "Penk"
## [97] "St8sia2" "Cdh19" "Gna14" "Shisa6" "Adgrl4" "Scnn1a"
## [103] "Fstl4" "Myl1" "Kctd8" "Unc13c" "Unc5c" "Kcnd2"
## [109] "Vgll3" "Alk" "Actg2" "Inpp4b" "Calcb" "Cysltr2"
## [115] "Robo1" "Trhde" "Erbb4" "Smtn" "Adarb2" "Dgkb"
## [121] "Piezo2" "Kif26b" "Prox1" "Cmah" "Sema5a" "Dock10"
## [127] "Abca9" "Skap1" "Hpse2" "Casp8" "Dlc1" "Tenm4"
## [133] "Trp53cor1" "Avil" "Cfh" "Efr3a" "Bloc1s6os" "Rbm46"
## [139] "Muc16" "Ndufa4l2" "Ccdc68" "Chrna9" "Lgals9" "Galnt18"
## [145] "Cntn4" "Hcn1" "Bmpr1b" "Dock1" "Dlgap1" "Slc4a4"
## [151] "Foxp2" "Nell1" "Luzp2" "Dab1" "Stac" "Kcnmb2"
## [157] "Il1rapl1" "Rmst" "Csmd3" "Pde1a" "Etl4" "Pde4b"
## [163] "Tenm2" "Mast4" "Cux2" "B3galt1" "Serpini1" "Serpinb8"
## [169] "Maf" "Prdm6" "Cadm2" "Tacr1" "Rarb" "Egfem1"
## [175] "Ssc4d" "Rtl4" "Grm1" "Tnr" "Clca3a2" "Fgf14"
## [181] "Eya1" "Itih5" "Mecom" "Bank1" "Cyp2j13" "Smpdl3b"
## [187] "Fbxw16" "Gabra1" "Kcnh3" "Meltf" "Cd86" "Sorcs1"
## [193] "Pcdh7" "Pgm5" "Fhit" "Nos1" "Egflam" "Acss1"
## [199] "Thsd4" "Olfr889" "Cdh12" "Dcn" "Hpgd" "Prune2"
## [205] "Egfr" "Galr1" "Syt10" "Moxd1" "Rgs6" "Npas3"
## [211] "Dach1" "Cerkl" "Ano1" "Thsd7a" "Met" "Ghr"
## [217] "Adam33" "Eepd1" "Adamts9" "Mgarp" "Exoc3l2" "Nlrp3"
## [223] "Rasgrf1" "Camk1d" "Cntn3" "Dpp10" "Ntm" "Ptprk"
## [229] "Htr2c" "Ntrk2" "Pdyn" "Olfm3" "Serpinb5" "Serpini2"
## [235] "Gbp2" "Gimap6" "Bcat1" "Lrrc37a" "Mei1" "Anxa1"
## [241] "Pik3ap1" "Flrt2" "Slit3" "P3h2" "Synpr" "Arhgap6"
## [247] "Adam2" "Nog" "Platr7" "Ephb1" "Egfros" "Rerg"
## [253] "Gng2" "Hmcn1" "Lama2" "Olfr78" "Ror1" "Fbxl7"
## [259] "Hs3st4" "Cntnap5b" "Calb1" "Satb2" "Crispld1" "Kcnn2"
## [265] "Pak7" "Slc35f4" "Stab2" "Bnc2" "Tcf7l2" "C79798"
## [271] "Cftr" "Thsd7b" "Slc2a13" "Abca8a" "Arhgap28" "Ttc29"
## [277] "Fendrr" "Lrrc18" "Col22a1" "Ptcra" "Mep1b" "Adamts13"
## [283] "Ier5l" "Pdgfra" "Mir9-3hg" "Smim31" "Spag5" "Cfap53"
## [289] "Col12a1" "Vcan" "Tmem132c" "Palld" "Nfatc1" "Gabrb1"
## [295] "Folh1" "Rasgef1b" "Sorcs3" "Galnt5" "Pakap" "Tmem132d"
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "orig.ident") +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "orig.ident")
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "FB.info", cols = color.FB) +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "FB.info", cols = color.FB)
DimHeatmap(GEX.seur, dims = 1:12, cells = 1500, balanced = TRUE,ncol = 4)
ElbowPlot(GEX.seur,ndims = 50)
PCs <- 1:25
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs, k.param = 20)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, method = 'igraph' ,resolution = 2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 13055
## Number of edges: 544702
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7949
## Number of communities: 27
## Elapsed time: 1 seconds
GEX.seur <- RunTSNE(GEX.seur, dims=PCs, complexity = 100)
GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 20, seed.use = 145)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 21:57:27 UMAP embedding parameters a = 0.9922 b = 1.112
## 21:57:27 Read 13055 rows and found 25 numeric columns
## 21:57:27 Using Annoy for neighbor search, n_neighbors = 20
## 21:57:27 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 21:57:28 Writing NN index file to temp file C:\Users\Shaorui\AppData\Local\Temp\RtmpgLNYVE\file607824eb3f72
## 21:57:28 Searching Annoy index using 1 thread, search_k = 2000
## 21:57:31 Annoy recall = 100%
## 21:57:32 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 20
## 21:57:33 Initializing from normalized Laplacian + noise (using irlba)
## 21:57:33 Commencing optimization for 200 epochs, with 385114 positive edges
## 21:57:46 Optimization finished
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)
FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA","percent.mt","percent.rb"))
DimPlot(GEX.seur, label = F, pt.size = 0.05, repel = F, reduction = 'umap', group.by = "FB.info", split.by = "FB.info",
ncol = 4, cols = color.FB)
GEX.seur$cnt <- factor(gsub(".1$|.2$|.3$|.4$","",as.character(GEX.seur$FB.info)),
levels = c("CTL",
"CKO"))
color.cnt <- color.FB[c(1,5)]
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "seurat_clusters") +
DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
library(DoubletFinder)
for(i in 1:length(sweep.res.list)){
if(length(sweep.res.list[[i]]$pANN[is.nan(sweep.res.list[[i]]$pANN)]) !=0){
if(i!=1){
sweep.res.list[[i]] <- sweep.res.list[[i-1]]
}else(
sweep.res.list[[i]] <- sweep.res.list[[i+1]]
)
}
}
sweep.stats <- summarizeSweep(sweep.res.list, GT=FALSE)
bcmvn <- find.pK(sweep.stats)
## NULL
pk_v <- as.numeric(as.character(bcmvn$pK))
pk_good <- pk_v[bcmvn$BCmetric==max(bcmvn$BCmetric)]
# specify expected doublet number
nExp_poi <- round(0.05*length(colnames(GEX.seur)))
GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good,
nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 4352 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## Centering and scaling data matrix
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.05"
# specify expected doublet number
nExp_poi <- round(0.1*length(colnames(GEX.seur)))
GEX.seur <- doubletFinder_v3(GEX.seur, PCs = PCs, pN = 0.25, pK = pk_good,
nExp = nExp_poi, reuse.pANN = FALSE, sct = FALSE)
## [1] "Creating 4352 artificial doublets..."
## [1] "Creating Seurat object..."
## [1] "Normalizing Seurat object..."
## [1] "Finding variable genes..."
## [1] "Scaling data..."
## [1] "Running PCA..."
## [1] "Calculating PC distance matrix..."
## [1] "Computing pANN..."
## [1] "Classifying doublets.."
colnames(GEX.seur@meta.data)[ncol(GEX.seur@meta.data)] <- "DoubletFinder0.1"
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.05") +
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1")
markers.new.ss <- list(EMN=c("Chat","Bnc2","Tox","Ptprt",
"Gfra2","Oprk1","Adamtsl1",
"Fbxw15","Fbxw24","Chrna7",
"Satb1","Itga6","Cntnap5b",
"Pgm5","Chgb",
"Nxph1",
"Gabrb1","Glp2r","Nebl",
"Lrrc7",
"Ryr3","Eda",
"Hgf","Lama2","Efnb2",
"Tac1",
"Kctd8",
"Ptn",
"Ntrk2","Penk","Itgb8",
"Fut9","Nfatc1","Egfr","Pdia5",
"Ahr","Mgll",
"Aff3",
"Chrm3"
),
IMN=c("Nos1","Kcnab1",
"Gfra1","Etv1",
"Man1a","Airn",
"Adcy2",
"Col25a1",
"Cmah","Creb5","Vip","Pde1a",
"Ebf1","Gpc5","Mid1","Igfbp5",
"Ppara",
"Pcdh11x","Adcy8","Grp"
),
IN=c("Npas3","Synpr","St18","Gal",
"Nova1",
"Cdh10","Kcnk13",
"Neurod6","Moxd1","Sctr",
"Piezo1","Vipr2","Adamts9","Sst","Kcnn2"
),
IPAN=c("Calb2","Adcy1","Calcb","Nmu","Adgrg6","Pcdh10",
"Ngfr","Galr1","Il7","Aff2",
"Gpr149","Itgb6","Met","Itgbl1",
"Cdh6","Cdh8",
"Clstn2","Ano2","Ntrk3",
"Cpne4","Vwc2l","Cdh9",
"Car10","Dcc",
"Scgn",
"Vcan","Cck","Piezo2","Kcnh7",
"Rerg","Bmpr1b","Skap1","Ntng1",
"Tafa2","Nxph2"),
check0407 = c("Gcg","Glp2r","Insl5","Nts",
"Ntsr1","Ntsr2","Pyy",#"Sst",
"Gip","Sct","Sctr","Ghrl"),
others = c("Sox10","Plp1","Gfap","Rxrg",
"Acta2","Myh11","Pdgfra","Bmp5"))
check0407 = c("Gcg","Glp2r","Insl5","Nts",
"Ntsr1","Ntsr2","Pyy","Sst",
"Gip","Sct","Sctr","Ghrl")
check.markers.ss <- as.vector(unlist(markers.new.ss))
check.markers.ss
## [1] "Chat" "Bnc2" "Tox" "Ptprt" "Gfra2" "Oprk1"
## [7] "Adamtsl1" "Fbxw15" "Fbxw24" "Chrna7" "Satb1" "Itga6"
## [13] "Cntnap5b" "Pgm5" "Chgb" "Nxph1" "Gabrb1" "Glp2r"
## [19] "Nebl" "Lrrc7" "Ryr3" "Eda" "Hgf" "Lama2"
## [25] "Efnb2" "Tac1" "Kctd8" "Ptn" "Ntrk2" "Penk"
## [31] "Itgb8" "Fut9" "Nfatc1" "Egfr" "Pdia5" "Ahr"
## [37] "Mgll" "Aff3" "Chrm3" "Nos1" "Kcnab1" "Gfra1"
## [43] "Etv1" "Man1a" "Airn" "Adcy2" "Col25a1" "Cmah"
## [49] "Creb5" "Vip" "Pde1a" "Ebf1" "Gpc5" "Mid1"
## [55] "Igfbp5" "Ppara" "Pcdh11x" "Adcy8" "Grp" "Npas3"
## [61] "Synpr" "St18" "Gal" "Nova1" "Cdh10" "Kcnk13"
## [67] "Neurod6" "Moxd1" "Sctr" "Piezo1" "Vipr2" "Adamts9"
## [73] "Sst" "Kcnn2" "Calb2" "Adcy1" "Calcb" "Nmu"
## [79] "Adgrg6" "Pcdh10" "Ngfr" "Galr1" "Il7" "Aff2"
## [85] "Gpr149" "Itgb6" "Met" "Itgbl1" "Cdh6" "Cdh8"
## [91] "Clstn2" "Ano2" "Ntrk3" "Cpne4" "Vwc2l" "Cdh9"
## [97] "Car10" "Dcc" "Scgn" "Vcan" "Cck" "Piezo2"
## [103] "Kcnh7" "Rerg" "Bmpr1b" "Skap1" "Ntng1" "Tafa2"
## [109] "Nxph2" "Gcg" "Glp2r" "Insl5" "Nts" "Ntsr1"
## [115] "Ntsr2" "Pyy" "Gip" "Sct" "Sctr" "Ghrl"
## [121] "Sox10" "Plp1" "Gfap" "Rxrg" "Acta2" "Myh11"
## [127] "Pdgfra" "Bmp5"
check.markers.sss <- check.markers.ss[check.markers.ss %in% rownames(GEX.seur)]
pm.All_raw <- DotPlot(GEX.seur, features = rev(unique(rev(check.markers.sss))), group.by = "seurat_clusters",assay = "RNA",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="check previous markers")
pm.All_raw
FeaturePlot(GEX.seur, features = c(#"Chat"
"Bnc2","Fut9","Nos1","Vip",
"Gal","Moxd1","Sst","Calcb",
"Nmu","Cdh9","Piezo2","Nxph2"),
ncol = 4)
VlnPlot(GEX.seur, features = c("Cdh8","Cdh9"), group.by = "seurat_clusters",ncol = 1)
GEX.seur$sort_clusters <- factor(as.character(GEX.seur$seurat_clusters),
levels = c(6,16,10,14, 3, 5, 12, 17, # EMN
1,8,0,11, 15,4,20, 18, 7, # IMN
22, 23, 2,21, # IN
9, 25, 19, 13, # IPAN
24, # Mix
26 # Glia/Fib/SMC (also as contaminated mix)
))
pm.All_sort <- DotPlot(GEX.seur, features = rev(unique(rev(check.markers.sss))), group.by = "sort_clusters",assay = "RNA",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="check previous markers")
pm.All_sort
GEX.seur$preAnno <- as.character(GEX.seur$seurat_clusters)
GEX.seur$preAnno[GEX.seur$preAnno %in% c(6,16,10,14)] <- "EMN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(3)] <- "EMN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(5)] <- "EMN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(12)] <- "EMN4"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(17)] <- "EMN5"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(1,8,0,11)] <- "IMN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(15,4,20)] <- "IMN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(18)] <- "IMN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(7)] <- "IMN4"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(22)] <- "IN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(23)] <- "IN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(2,21)] <- "IN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(9)] <- "IPAN1"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(25)] <- "IPAN2"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(19)] <- "IPAN3"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(13)] <- "IPAN4"
GEX.seur$preAnno[GEX.seur$preAnno %in% c(24,26)] <- "Mix"
GEX.seur$preAnno <- factor(GEX.seur$preAnno,
levels = c(paste0("EMN",1:5),
paste0("IMN",1:4),
paste0("IN",1:3),
paste0("IPAN",1:4),
"Mix"))
color.pre <-c("#678BB1","#8AB6CE","#3975C1","#669FDF","#4CC1BD",
"#BF7E6B","#D46B35","#F19258","#FF8080",
"#BDAE8D","#BD66C4","#C03778",
"#97BA59","#DFAB16","#2BA956","#9FE727",
"red")
DimPlot(GEX.seur, reduction = "umap", label = T, group.by = "preAnno", cols = color.pre) +
DimPlot(GEX.seur, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
pm.All_pre <- DotPlot(GEX.seur, features = rev(unique(rev(check.markers.sss))), group.by = "preAnno",assay = "RNA",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="check previous markers")
pm.All_pre
#saveRDS(GEX.seur, "./GEX0925.seur.preAnno.rds")
GEX.pure <- subset(GEX.seur,subset = preAnno != "Mix" & DoubletFinder0.1 == "Singlet")
GEX.pure
## An object of class Seurat
## 20978 features across 11666 samples within 1 assay
## Active assay: RNA (20978 features, 1112 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.pure, reduction = "umap", label = T, group.by = "preAnno", cols = color.pre) +
DimPlot(GEX.pure, reduction = "umap", label = F, group.by = "cnt", cols = color.cnt)
pm.All_pre <- DotPlot(GEX.pure, features = rev(unique(rev(check.markers.sss))), group.by = "preAnno",assay = "RNA",
cols = c("midnightblue","darkorange1")) +
theme(axis.text.x = element_text(angle = 60, hjust = 1, vjust = 1,size = 9.15))+ scale_y_discrete(limits=rev) + labs(title="check previous markers")
pm.All_pre
GEX.pure$rep <- paste0("rep",
gsub("CTL|CKO","",as.character(GEX.pure$FB.info)))
GEX.pure$FB.info <- factor(as.character(GEX.pure$FB.info),
levels = c(paste0("CTL.",1:4),
paste0("CKO.",1:4)))
GEX.pure$preAnno <- factor(as.character(GEX.pure$preAnno),
levels = levels(GEX.seur$preAnno)[1:16])
cowplot::plot_grid(
pheatmap::pheatmap(table(FB.info=GEX.pure$FB.info,
clusters=GEX.pure$preAnno),
main = "Cell Count",
gaps_row = c(4),
gaps_col = c(5,9,12),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f", legend = F, silent = T)$gtable,
pheatmap::pheatmap(100*rowRatio(table(FB.info=GEX.pure$FB.info,
clusters=GEX.pure$preAnno)),
main = "Cell Ratio",
gaps_row = c(4),
gaps_col = c(5,9,12),
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.1f", legend = F, silent =T)$gtable,
ncol = 1)
stat_preAnno <- GEX.pure@meta.data[,c("cnt","rep","preAnno")]
stat_preAnno.s <- stat_preAnno %>%
group_by(cnt,rep,preAnno) %>%
summarise(count=n()) %>%
mutate(prop= count/sum(count)) %>%
ungroup
## `summarise()` has grouped output by 'cnt', 'rep'. You can override using the
## `.groups` argument.
pcom.preAnno <- stat_preAnno.s %>%
ggplot(aes(x = preAnno, y = 100*prop, color=cnt)) +
geom_bar(stat="summary", fun="mean", position = position_dodge(0.75), width = 0.58, fill="white") +
theme_classic(base_size = 15) +
scale_color_manual(values = c("skyblue","pink"), name="") +
labs(title="Cell Composition of Colon data, preAnno: CR7d CTL vs CKO", y = "Proportion") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 9.6)) +
geom_point(aes(x=preAnno, y = 100*prop, color= cnt),
position = position_dodge(0.75),
shape=16,alpha=0.75,size=2.15,
stroke=0.15, show.legend=F)
pcom.preAnno
glm.nb - anova.LRT
Sort.N <- c("CTL","CKO")
glm.nb_anova.preAnno <- list()
for(x1 in 1:2){
for(x2 in 1:2){
if(x2>x1){
stat_preAnno.s_N <- stat_preAnno.s %>% filter(cnt %in% c(Sort.N[x1],Sort.N[x2]))
total.N <- stat_preAnno.s_N %>%
group_by(cnt, rep) %>%
summarise(total=sum(count)) %>% ungroup() %>% as.data.frame()
rownames(total.N) <- paste0(as.character(total.N$cnt),
"_",
as.character(total.N$rep))
stat_preAnno.s_N$total <- total.N[paste0(stat_preAnno.s_N$cnt,
"_",
stat_preAnno.s_N$rep),"total"]
glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- list()
for(AA in levels(stat_preAnno.s_N$preAnno)){
glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]][[AA]] <- tryCatch({
aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=1000)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=100)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error=function(e){
tryCatch({
aaa <- stat_preAnno.s_N %>% filter(preAnno==AA)
aaa.glmnb <- MASS::glm.nb(count ~ cnt + offset(log(total)), data = aaa, maxit=10)
aaa.anova <- anova(aaa.glmnb, test = "LRT")
aaa.anova$'Pr(>Chi)'[2]
}, error = function(e){
NA
})
})
})
}
glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]] <- unlist(glm.nb_anova.preAnno[[paste0(Sort.N[x1],"vs",Sort.N[x2])]])
}
}
}
glm.nb_anova.preAnno_df <- t(data.frame(Reduce(function(x,y){rbind(x,y)},glm.nb_anova.preAnno)))
rownames(glm.nb_anova.preAnno_df) <- names(glm.nb_anova.preAnno)
colnames(glm.nb_anova.preAnno_df) <- gsub("X","C",colnames(glm.nb_anova.preAnno_df))
glm.nb_anova.preAnno_df
## EMN1 EMN2 EMN3 EMN4 EMN5 IMN1 IMN2
## CTLvsCKO 0.0194322 0.5587671 0.3886185 0.3822187 0.4662382 0.4837981 0.5977187
## IMN3 IMN4 IN1 IN2 IN3 IPAN1
## CTLvsCKO 0.3725829 0.5909909 0.04770969 0.4219428 0.3814654 0.465705
## IPAN2 IPAN3 IPAN4
## CTLvsCKO 0.002575981 0.6452318 0.6279909
round(glm.nb_anova.preAnno_df,4)
## EMN1 EMN2 EMN3 EMN4 EMN5 IMN1 IMN2 IMN3 IMN4 IN1
## CTLvsCKO 0.0194 0.5588 0.3886 0.3822 0.4662 0.4838 0.5977 0.3726 0.591 0.0477
## IN2 IN3 IPAN1 IPAN2 IPAN3 IPAN4
## CTLvsCKO 0.4219 0.3815 0.4657 0.0026 0.6452 0.628